import torch


torch.__version__


from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")


result = tokenizer("并广泛动员社会各界的力量")


result


tokenizer.all_special_ids


tokenizer.all_special_tokens


from transformers import BertTokenizerFast


tokenizer1 = BertTokenizerFast.from_pretrained("bert-base-chinese")


result = tokenizer1("并广泛动员社会各界的力量")


result


result = tokenizer1("第一个句子", "第二个句子")
result


tokenizer1.decode([101, 5018, 671, 702, 1368, 2094, 102, 5018, 753, 702, 1368, 2094, 102])


result = tokenizer1(["第一句", "第二个句子"], padding=True)
print(result)



